In [1]:
# Importing the libraries to Jupyter Notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
In [2]:
# Step 2 Read CSV file
# Load csv file to pandas dataframe =
World_Population = pd.read_csv("C:/Users/brukt/Downloads/world-population.csv")
In [3]:
World_Population
Out[3]:
| country | Year | Population | Yearly % Change | Yearly Change | Migrants (net) | Median Age | Fertility Rate | Density (P/Km²) | Urban Pop % | Urban Population | Country's Share of World Pop | World Population | Rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 2020 | 38928346 | 2.33 % | 886592 | -62920.0 | 18.4 | 4.56 | 60 | 25.4 % | 9904337 | 0.50 % | 7794798739 | 37 |
| 1 | Afghanistan | 2019 | 38041754 | 2.34 % | 869833 | -62920.0 | 17.4 | 5.26 | 58 | 25.2 % | 9582625 | 0.49 % | 7713468100 | 37 |
| 2 | Afghanistan | 2018 | 37171921 | 2.41 % | 875808 | -62920.0 | 17.4 | 5.26 | 57 | 24.9 % | 9273302 | 0.49 % | 7631091040 | 38 |
| 3 | Afghanistan | 2017 | 36296113 | 2.58 % | 913081 | -62920.0 | 17.4 | 5.26 | 56 | 24.7 % | 8971472 | 0.48 % | 7547858925 | 39 |
| 4 | Afghanistan | 2016 | 35383032 | 2.82 % | 969429 | -62920.0 | 17.4 | 5.26 | 54 | 24.5 % | 8670939 | 0.47 % | 7464022049 | 39 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3613 | Zimbabwe | 1975 | 6293875 | 3.54 % | 200914 | -9109.0 | 15.4 | 7.40 | 16 | 19.3 % | 1215331 | 0.15 % | 4079480606 | 79 |
| 3614 | Zimbabwe | 1970 | 5289303 | 3.42 % | 163625 | -8400.0 | 15.6 | 7.40 | 14 | 17.0 % | 898584 | 0.14 % | 3700437046 | 79 |
| 3615 | Zimbabwe | 1965 | 4471177 | 3.43 % | 138899 | -3002.0 | 16.0 | 7.30 | 12 | 14.4 % | 644767 | 0.13 % | 3339583597 | 91 |
| 3616 | Zimbabwe | 1960 | 3776681 | 3.28 % | 112679 | -1501.0 | 17.2 | 7.00 | 10 | 12.5 % | 472478 | 0.12 % | 3034949748 | 87 |
| 3617 | Zimbabwe | 1955 | 3213286 | 3.19 % | 93287 | -901.0 | 18.1 | 6.80 | 8 | 11.5 % | 371106 | 0.12 % | 2773019936 | 91 |
3618 rows × 14 columns
In [4]:
# Step 3 Explore the data
# Checking the first 5 rows from the data frame to understand the structure
World_Population.head()
Out[4]:
| country | Year | Population | Yearly % Change | Yearly Change | Migrants (net) | Median Age | Fertility Rate | Density (P/Km²) | Urban Pop % | Urban Population | Country's Share of World Pop | World Population | Rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 2020 | 38928346 | 2.33 % | 886592 | -62920.0 | 18.4 | 4.56 | 60 | 25.4 % | 9904337 | 0.50 % | 7794798739 | 37 |
| 1 | Afghanistan | 2019 | 38041754 | 2.34 % | 869833 | -62920.0 | 17.4 | 5.26 | 58 | 25.2 % | 9582625 | 0.49 % | 7713468100 | 37 |
| 2 | Afghanistan | 2018 | 37171921 | 2.41 % | 875808 | -62920.0 | 17.4 | 5.26 | 57 | 24.9 % | 9273302 | 0.49 % | 7631091040 | 38 |
| 3 | Afghanistan | 2017 | 36296113 | 2.58 % | 913081 | -62920.0 | 17.4 | 5.26 | 56 | 24.7 % | 8971472 | 0.48 % | 7547858925 | 39 |
| 4 | Afghanistan | 2016 | 35383032 | 2.82 % | 969429 | -62920.0 | 17.4 | 5.26 | 54 | 24.5 % | 8670939 | 0.47 % | 7464022049 | 39 |
In [5]:
# Get basic info about the database such as numbers of non-nullvalue, column names, data type
print(World_Population.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3618 entries, 0 to 3617 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 3618 non-null object 1 Year 3618 non-null int64 2 Population 3618 non-null int64 3 Yearly % Change 3618 non-null object 4 Yearly Change 3618 non-null int64 5 Migrants (net) 3618 non-null float64 6 Median Age 3618 non-null float64 7 Fertility Rate 3618 non-null float64 8 Density (P/Km²) 3618 non-null int64 9 Urban Pop % 3618 non-null object 10 Urban Population 3618 non-null object 11 Country's Share of World Pop 3618 non-null object 12 World Population 3618 non-null int64 13 Rank 3618 non-null int64 dtypes: float64(3), int64(6), object(5) memory usage: 395.8+ KB None
In [6]:
# View summary statistics for numerical columns
World_Population.describe()
Out[6]:
| Year | Population | Yearly Change | Migrants (net) | Median Age | Fertility Rate | Density (P/Km²) | World Population | Rank | |
|---|---|---|---|---|---|---|---|---|---|
| count | 3618.000000 | 3.618000e+03 | 3.618000e+03 | 3.618000e+03 | 3618.000000 | 3618.000000 | 3618.000000 | 3.618000e+03 | 3618.000000 |
| mean | 1994.166667 | 2.832801e+07 | 3.824206e+05 | -4.818684e+00 | 25.474986 | 3.887186 | 249.710614 | 5.694506e+09 | 101.052792 |
| std | 21.724088 | 1.144600e+08 | 1.429847e+06 | 1.060564e+05 | 8.218262 | 1.985077 | 1209.870554 | 1.736142e+09 | 58.121232 |
| min | 1955.000000 | 1.898500e+04 | -6.730240e+05 | -1.077397e+06 | 14.400000 | 0.850000 | 0.000000 | 2.773020e+09 | 1.000000 |
| 25% | 1975.000000 | 1.056333e+06 | 6.570250e+03 | -1.228375e+04 | 18.500000 | 2.090000 | 19.000000 | 4.079481e+09 | 51.000000 |
| 50% | 1997.500000 | 5.178470e+06 | 6.008850e+04 | -9.000000e+02 | 22.750000 | 3.400000 | 65.000000 | 5.943853e+09 | 101.000000 |
| 75% | 2016.000000 | 1.720896e+07 | 2.693282e+05 | 5.000000e+03 | 31.600000 | 5.740000 | 149.750000 | 7.464022e+09 | 151.000000 |
| max | 2020.000000 | 1.439324e+09 | 2.067648e+07 | 1.771991e+06 | 48.400000 | 8.800000 | 21645.000000 | 7.794799e+09 | 210.000000 |
In [7]:
# Step 4 cleaning datas
# Identifying missing values
World_Population.isnull().sum()
Out[7]:
country 0 Year 0 Population 0 Yearly % Change 0 Yearly Change 0 Migrants (net) 0 Median Age 0 Fertility Rate 0 Density (P/Km²) 0 Urban Pop % 0 Urban Population 0 Country's Share of World Pop 0 World Population 0 Rank 0 dtype: int64
In [8]:
# check if there is duplicated rows
World_Population.duplicated().sum()
Out[8]:
0
In [9]:
## Cleaning columns with exta space
## displaying the current column
print(World_Population.columns)
Index(['country', 'Year', 'Population', 'Yearly % Change', 'Yearly Change',
'Migrants (net)', 'Median Age', 'Fertility Rate', 'Density (P/Km²)',
'Urban Pop %', 'Urban Population', 'Country's Share of World Pop',
'World Population', 'Rank'],
dtype='object')
In [10]:
## replace the original columns by removing extra sppaces
## (1) we can either do it one by one by using this code
World_Population = World_Population.rename(columns={'Urban Pop %': 'Urban Pop %'})
## display the updated column names
print(World_Population.columns)
## (2) we can remove all extra space from all columns in the World_Population by using "str.strip"
World_Population.columns = World_Population.columns.str.strip()
Index(['country', 'Year', 'Population', 'Yearly % Change', 'Yearly Change',
'Migrants (net)', 'Median Age', 'Fertility Rate', 'Density (P/Km²)',
'Urban Pop %', 'Urban Population', 'Country's Share of World Pop',
'World Population', 'Rank'],
dtype='object')
In [11]:
World_Population
Out[11]:
| country | Year | Population | Yearly % Change | Yearly Change | Migrants (net) | Median Age | Fertility Rate | Density (P/Km²) | Urban Pop % | Urban Population | Country's Share of World Pop | World Population | Rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 2020 | 38928346 | 2.33 % | 886592 | -62920.0 | 18.4 | 4.56 | 60 | 25.4 % | 9904337 | 0.50 % | 7794798739 | 37 |
| 1 | Afghanistan | 2019 | 38041754 | 2.34 % | 869833 | -62920.0 | 17.4 | 5.26 | 58 | 25.2 % | 9582625 | 0.49 % | 7713468100 | 37 |
| 2 | Afghanistan | 2018 | 37171921 | 2.41 % | 875808 | -62920.0 | 17.4 | 5.26 | 57 | 24.9 % | 9273302 | 0.49 % | 7631091040 | 38 |
| 3 | Afghanistan | 2017 | 36296113 | 2.58 % | 913081 | -62920.0 | 17.4 | 5.26 | 56 | 24.7 % | 8971472 | 0.48 % | 7547858925 | 39 |
| 4 | Afghanistan | 2016 | 35383032 | 2.82 % | 969429 | -62920.0 | 17.4 | 5.26 | 54 | 24.5 % | 8670939 | 0.47 % | 7464022049 | 39 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3613 | Zimbabwe | 1975 | 6293875 | 3.54 % | 200914 | -9109.0 | 15.4 | 7.40 | 16 | 19.3 % | 1215331 | 0.15 % | 4079480606 | 79 |
| 3614 | Zimbabwe | 1970 | 5289303 | 3.42 % | 163625 | -8400.0 | 15.6 | 7.40 | 14 | 17.0 % | 898584 | 0.14 % | 3700437046 | 79 |
| 3615 | Zimbabwe | 1965 | 4471177 | 3.43 % | 138899 | -3002.0 | 16.0 | 7.30 | 12 | 14.4 % | 644767 | 0.13 % | 3339583597 | 91 |
| 3616 | Zimbabwe | 1960 | 3776681 | 3.28 % | 112679 | -1501.0 | 17.2 | 7.00 | 10 | 12.5 % | 472478 | 0.12 % | 3034949748 | 87 |
| 3617 | Zimbabwe | 1955 | 3213286 | 3.19 % | 93287 | -901.0 | 18.1 | 6.80 | 8 | 11.5 % | 371106 | 0.12 % | 2773019936 | 91 |
3618 rows × 14 columns
In [12]:
World_Population.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3618 entries, 0 to 3617 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 3618 non-null object 1 Year 3618 non-null int64 2 Population 3618 non-null int64 3 Yearly % Change 3618 non-null object 4 Yearly Change 3618 non-null int64 5 Migrants (net) 3618 non-null float64 6 Median Age 3618 non-null float64 7 Fertility Rate 3618 non-null float64 8 Density (P/Km²) 3618 non-null int64 9 Urban Pop % 3618 non-null object 10 Urban Population 3618 non-null object 11 Country's Share of World Pop 3618 non-null object 12 World Population 3618 non-null int64 13 Rank 3618 non-null int64 dtypes: float64(3), int64(6), object(5) memory usage: 395.8+ KB
In [13]:
# Handling missing values
# drop rows with missing values(if any)
World_Population.dropna(inplace=True)
In [14]:
World_Population.isnull().sum()
Out[14]:
country 0 Year 0 Population 0 Yearly % Change 0 Yearly Change 0 Migrants (net) 0 Median Age 0 Fertility Rate 0 Density (P/Km²) 0 Urban Pop % 0 Urban Population 0 Country's Share of World Pop 0 World Population 0 Rank 0 dtype: int64
In [15]:
# Total population for the latest year
# First define the latest year, then the total population of the latest year
latest_year = World_Population['Year'].max()
total_population_latest_year = World_Population[World_Population['Year'] == latest_year]['Population'].sum()
print(f'The World Population in {latest_year}: {total_population_latest_year}')
The World Population in 2020: 7794099349
In [16]:
latest_year = World_Population['Year'].max()
total_population_latest_year = World_Population[World_Population['Year'] == latest_year]['Population'].sum()
print(f"Total World Population in {latest_year}: {total_population_latest_year}")
Total World Population in 2020: 7794099349
In [17]:
# formatting the numbers with comma in order to separate them
total_population_formatted = f"{total_population_latest_year:,}"
print(f'The World population in {latest_year}: {total_population_formatted}')
The World population in 2020: 7,794,099,349
In [18]:
latest_year = World_Population['Year'].max()
In [19]:
population_by_country_latest_year = World_Population[World_Population['Year'] == latest_year].groupby('country')['Population'].sum()
population_by_country_latest_year.head(10)
Out[19]:
country Afghanistan 38928346 Albania 2877797 Algeria 43851044 Angola 32866272 Antigua And Barbuda 97929 Argentina 45195774 Armenia 2963243 Aruba 106766 Australia 25499884 Austria 9006398 Name: Population, dtype: int64
In [20]:
## latest year population by country(grouping)
population_by_country_latest_year = World_Population[World_Population['Year'] == latest_year].groupby('country')['Population'].sum().sort_values(ascending=False)
population_by_country_latest_year.head(10)
Out[20]:
country China 1439323776 India 1380004385 United States 331002651 Indonesia 273523615 Pakistan 220892340 Brazil 212559417 Nigeria 206139589 Bangladesh 164689383 Russia 145934462 Mexico 128932753 Name: Population, dtype: int64
In [21]:
## Average Population for latest year by country
Average_population_latest_year = World_Population[World_Population['Year'] == latest_year]['Population'].mean()
print(f'The Average Population in {latest_year}: {Average_population_latest_year}')
The Average Population in 2020: 38776613.676616915
In [22]:
## Data visualisation
# Population distribution for latest year
plt.figure(figsize=(10,6))
sns.histplot(World_Population[World_Population['Year'] == latest_year]['Population'], bins=30, kde=True)
plt.title(f'Population Distribution in {latest_year}')
plt.xlabel('Population')
plt.ylabel('Frequency')
plt.show()
C:\ProgramData\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
In [23]:
top_countries = World_Population[World_Population['Year'] == 2020].nlargest(5, 'Population')
plt.figure(figsize=(12, 6))
sns.barplot(data=top_countries, x='Population', y='country')
plt.title('Top 5 Most Populous Countries in 2020')
plt.xlabel('Population')
plt.ylabel('Country')
plt.show()
In [24]:
# TOP 10 most populated countries byy the latest year
population_by_country_latest_year = World_Population[World_Population['Year'] == latest_year].groupby('country')['Population'].sum().sort_values(ascending=False)
Top_10_countries_latest_year = population_by_country_latest_year.head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=Top_10_countries_latest_year.values, y=Top_10_countries_latest_year.index, palette='viridis')
plt.title(f'Top 10 Most Populated countries by {latest_year}')
plt.xlabel('Population')
plt.ylabel('country')
plt.show()
In [25]:
import plotly.express as px
In [26]:
# Interactive plot with plotly
fig = px.line(World_Population, x='Year', y='Population', color='country', title='Population Growth Over Time')
fig.show()
In [27]:
#### Population growth analysis
# Plot population growth for a specific country
country = 'United Kingdom'
country_data = World_Population[World_Population['country'] == country]
plt.figure(figsize=(10,6))
plt.plot(country_data['Year'],country_data['Population'], marker = 'o')
plt.title(f'Population Growth in {country}')
plt.xlabel('Year')
plt.ylabel('Population')
plt.grid(True)
plt.show()
In [28]:
# plotting the median age trend for a specific cocountry
# median age trend of the united kingdon
country = 'United Kingdom'
country_data = World_Population[World_Population['country'] == country]
plt.figure(figsize=(12,6))
plt.plot(country_data['Year'], country_data['Population'], marker = "o", color = 'green', linestyle='-', linewidth=0.5, alpha= 0.7)
plt.title(f'The Median Age in {country}')
plt.xlabel('Year')
plt.ylabel('Median Age')
plt.grid(True)
plt.show()
In [29]:
## Population growth ananlysis
# camparative analysis betweeen united kingdom and united france
# first we determine the countries
country1 = 'France'
country2 = 'United Kingdom'
# 2nd determining the population of each country
country1_data = World_Population[World_Population['country'] == country1]
country2_data = World_Population[World_Population['country'] == country2]
# 3rd ploting the analysis using graph
plt.figure(figsize=(10,6))
plt.plot(country1_data['Year'], country1_data['Population'], marker = '*', label = country1, color = 'purple')
plt.plot(country2_data['Year'], country2_data['Population'], marker = 'o', label = country2, color = 'orange')
plt.title('The Population Growth comparison')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.grid(True)
plt.show()
In [30]:
import plotly.express as px
In [31]:
# Ploting an urban population percentage trend for a specific country
plt.figure(figsize=(10,6))
plt.plot(country_data['Year'], country_data['Urban Pop %'], marker = 'o', color = 'violet')
plt.title(f'Urban Population Percentage in {country}')
plt.xlabel('Year')
plt.ylabel('Urban pop %')
plt.grid(True)
plt.show()
Correlation Analysis¶
In [32]:
# correlation analysis : These insights help in understanding the demographic and population dynamics across different countries over time.
# 1 st Filter out non-numeric columns because the world population data frame consists not only numbers but floats,...
numeric_df = World_Population.select_dtypes(include=[float, int])
correlation_matrix= numeric_df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
In [33]:
#### the correlation int this data heatmap shows that the relation ship between different numerical variables ranging from -1 to 1 indicates:
# (1) Year: Positively correlated with Population (0.075), Yearly Change (0.03), Density (P/Km²) (0.07), and World Population (1).
# Negatively correlated with Median Age (-0.53) and Rank (-0.0012).
# This suggests that as time progresses, the world population and density increase, while median age decreases.
# (2) Population: Positively correlated with Year (0.075), Yearly Change (0.31), Density (P/Km²) (0.41), and World Population (1).
# Negatively correlated with Rank (-0.53).
# This implies that countries with higher populations tend to have a higher yearly change and density but a lower rank.
# (3) Yearly change: Positively correlated with Population (0.31) and Density (P/Km²) (0.15).
# This indicates that countries with a higher population tend to have a higher yearly change in population.
# (4) Migrants rate: No strong correlations with other variables.
# This suggests that net migration does not significantly impact other variables in this dataset.
# (5) Median age: Negatively correlated with Population (-0.53) and Yearly Change (-0.5).
# Positively correlated with Fertility Rate (0.31).
# This indicates that countries with a higher median age tend to have lower populations and yearly changes but higher fertility rates.
# (6) Fertility Rate: Positively correlated with Median Age (0.31).
# This suggests that higher fertility rates are associated with a higher median age in the population.
# (7) Density (p/km^2) : Positively correlated with Population (0.41) and Yearly Change (0.15).
# This implies that higher population density is associated with higher populations and yearly changes.
# (8) Word population: Perfectly correlated with Year (1), indicating that world population data changes consistently with the year.
# Positively correlated with Population and Density.
# (9) Rank: Negatively correlated with Population (-0.53).
# This suggests that countries with a higher population have a lower rank (where a lower rank number indicates a higher population rank).
# conclussion:
# Population Growth: As the year progresses, the world population grows, which is expected.
# Population Density: Higher population countries tend to have higher population densities.
# Median Age: Higher populations and yearly changes are associated with a lower median age, suggesting that younger populations are growing faster.
# Fertility Rate: There is a complex relationship where higher fertility rates are found in populations with a higher median age.
In [34]:
World_Population.to_csv("C:/Users/brukt/Downloads/world-population.csv", index=False)
In [35]:
World_Population = pd.read_csv("C:/Users/brukt/Downloads/world-population.csv")
In [36]:
World_Population
Out[36]:
| country | Year | Population | Yearly % Change | Yearly Change | Migrants (net) | Median Age | Fertility Rate | Density (P/Km²) | Urban Pop % | Urban Population | Country's Share of World Pop | World Population | Rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 2020 | 38928346 | 2.33 % | 886592 | -62920.0 | 18.4 | 4.56 | 60 | 25.4 % | 9904337 | 0.50 % | 7794798739 | 37 |
| 1 | Afghanistan | 2019 | 38041754 | 2.34 % | 869833 | -62920.0 | 17.4 | 5.26 | 58 | 25.2 % | 9582625 | 0.49 % | 7713468100 | 37 |
| 2 | Afghanistan | 2018 | 37171921 | 2.41 % | 875808 | -62920.0 | 17.4 | 5.26 | 57 | 24.9 % | 9273302 | 0.49 % | 7631091040 | 38 |
| 3 | Afghanistan | 2017 | 36296113 | 2.58 % | 913081 | -62920.0 | 17.4 | 5.26 | 56 | 24.7 % | 8971472 | 0.48 % | 7547858925 | 39 |
| 4 | Afghanistan | 2016 | 35383032 | 2.82 % | 969429 | -62920.0 | 17.4 | 5.26 | 54 | 24.5 % | 8670939 | 0.47 % | 7464022049 | 39 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3613 | Zimbabwe | 1975 | 6293875 | 3.54 % | 200914 | -9109.0 | 15.4 | 7.40 | 16 | 19.3 % | 1215331 | 0.15 % | 4079480606 | 79 |
| 3614 | Zimbabwe | 1970 | 5289303 | 3.42 % | 163625 | -8400.0 | 15.6 | 7.40 | 14 | 17.0 % | 898584 | 0.14 % | 3700437046 | 79 |
| 3615 | Zimbabwe | 1965 | 4471177 | 3.43 % | 138899 | -3002.0 | 16.0 | 7.30 | 12 | 14.4 % | 644767 | 0.13 % | 3339583597 | 91 |
| 3616 | Zimbabwe | 1960 | 3776681 | 3.28 % | 112679 | -1501.0 | 17.2 | 7.00 | 10 | 12.5 % | 472478 | 0.12 % | 3034949748 | 87 |
| 3617 | Zimbabwe | 1955 | 3213286 | 3.19 % | 93287 | -901.0 | 18.1 | 6.80 | 8 | 11.5 % | 371106 | 0.12 % | 2773019936 | 91 |
3618 rows × 14 columns
In [37]:
World_Population.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3618 entries, 0 to 3617 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 3618 non-null object 1 Year 3618 non-null int64 2 Population 3618 non-null int64 3 Yearly % Change 3618 non-null object 4 Yearly Change 3618 non-null int64 5 Migrants (net) 3618 non-null float64 6 Median Age 3618 non-null float64 7 Fertility Rate 3618 non-null float64 8 Density (P/Km²) 3618 non-null int64 9 Urban Pop % 3618 non-null object 10 Urban Population 3618 non-null object 11 Country's Share of World Pop 3618 non-null object 12 World Population 3618 non-null int64 13 Rank 3618 non-null int64 dtypes: float64(3), int64(6), object(5) memory usage: 395.8+ KB
TO VISUALISE THE DATA ON A MAP¶
In [38]:
from geopy.geocoders import Nominatim
from IPython.display import IFrame, display
import time
import folium
In [39]:
# filtering the world data for the year 2020
world_pop_2020 = World_Population[World_Population['Year'] == 2020]
In [40]:
geolocator = Nominatim(user_agent="geoapiWorld")
def get_coordinates(country):
try:
location = geolocator.geocode(country)
return (location.latitude, location.longitude)
except:
return (None, None)
# Get coordinates for each country for the year 2020
world_pop_2020['coordinates'] = world_pop_2020['country'].apply(get_coordinates)
# Drop rows where coordinates could not be found
world_pop_2020 = world_pop_2020.dropna(subset=['coordinates'])
# Separate latitude and longitude into separate columns
world_pop_2020['latitude'] = world_pop_2020['coordinates'].apply(lambda x: x[0])
world_pop_2020['longitude'] = world_pop_2020['coordinates'].apply(lambda x: x[1])
C:\Users\brukt\AppData\Local\Temp\ipykernel_11812\1735024007.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
In [41]:
# Create a map centered around the mean latitude and longitude
# to change the map style use 'tiles='OpenStreetMap','Stamen Terrain','Stamen Toner',
#'Stamen Watercolor','CartoDB positron','CartoDB dark_matter'
m = folium.Map(
location=[world_pop_2020['latitude'].mean(), world_pop_2020['longitude'].mean()],
zoom_start=2,
tiles='Openstreetmap'
)
# Add points to the map
for _, row in world_pop_2020.iterrows():
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=5,
popup=(
f"Country: {row['country']}<br>"
f"Population: {row['Population']}<br>"
f"Yearly % Change: {row['Yearly % Change']}<br>"
f"Median Age: {row['Median Age']}<br>"
f"Fertility Rate: {row['Fertility Rate']}<br>"
f"Density (P/Km²): {row['Density (P/Km²)']}<br>"
f"Urban Pop %: {row['Urban Pop %']}<br>"
f"Urban Population: {row['Urban Population']}<br>"
),
color='blue',
fill=True,
fill_color='blue'
).add_to(m)
# Save the map to an HTML file
m.save('map.html')
In [42]:
# to display the map with in the notebook
display(IFrame('map.html', width=700, height=500))
In [ ]: